import pandas as pd
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
import sys
import nltk
import numpy as np
import sklearn
import warnings
warnings.filterwarnings("ignore")
import string
import contractions
from nltk.corpus import stopwords, wordnet
import pickle
import pyLDAvis.sklearn
from collections import Counter
from textblob import TextBlob
from nltk.probability import FreqDist
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation, NMF
from wordcloud import WordCloud, ImageColorGenerator
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
pd.options.mode.chained_assignment = None
pd.set_option('display.max_colwidth', 100)
df = pd.read_csv(r'qa_senti_analysis.csv',encoding='latin1')
df.head()
| freshdesk_id | title | created | clean_content | happiness_rating | |
|---|---|---|---|---|---|
| 0 | 2073 | LMS content issue reported for Presentation_PPT of DevOps Workshop | 4/24/2020 12:08 | Dear IRD Team -<br><br><br /> User barath.star81@gmail.com has raised a query... | NaN |
| 1 | 2074 | LMS content issue reported for Presentation_PPT of Ansible Certification Training | 4/24/2020 12:15 | Dear IRD Team -<br><br><br /> User version@tech.edureka.in has raised a query... | NaN |
| 2 | 2075 | LMS content issue reported for Presentation - Random Forest_PPT of Data Science Workshop | 4/24/2020 12:05 | Dear IRD Team -<br><br><br /> User barath.star81@gmail.com has raised a query... | NaN |
| 3 | 2076 | When will the grades be out for the PGP? | 4/24/2020 12:15 | I have submitted the final project When will the evaluation process complete? | NaN |
| 4 | 2077 | LMS content issue reported for Presentation_PPT of DevOps Workshop | 4/24/2020 12:16 | Dear IRD Team -<br><br><br /> User barath.star81@gmail.com has raised a query... | NaN |
df.isna().sum()
freshdesk_id 0 title 18 created 0 clean_content 2916 happiness_rating 375858 dtype: int64
df.dropna(subset=["clean_content"],inplace=True)
df.isna().sum()
freshdesk_id 0 title 17 created 0 clean_content 0 happiness_rating 373149 dtype: int64
df1 = df[['freshdesk_id','clean_content','happiness_rating']]
df1
| freshdesk_id | clean_content | happiness_rating | |
|---|---|---|---|
| 0 | 2073 | Dear IRD Team -<br><br><br /> User barath.star81@gmail.com has raised a query... | NaN |
| 1 | 2074 | Dear IRD Team -<br><br><br /> User version@tech.edureka.in has raised a query... | NaN |
| 2 | 2075 | Dear IRD Team -<br><br><br /> User barath.star81@gmail.com has raised a query... | NaN |
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN |
| 4 | 2077 | Dear IRD Team -<br><br><br /> User barath.star81@gmail.com has raised a query... | NaN |
| ... | ... | ... | ... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN |
| 436030 | 1020410 | Dear IRD Team -<br><br><br /> User sbonageri@gmail.com has raised a query wit... | NaN |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN |
433117 rows × 3 columns
df1.drop(df1[df1['clean_content'].str.contains("Dear IRD Team")].index,inplace = True)
df1.drop(df1[df1['clean_content'].str.contains("dear team")].index,inplace = True)
df1.drop(df1[df1['clean_content'].str.contains("sdddddddddddddddd")].index,inplace = True)
df1.drop(df1[df1['clean_content'].str.contains("Content URL:https://wwwupload.s3.amazonaws.com/Library%2F2roi4.ipynb")].index,inplace = True)
df1.drop(df1[df1['clean_content'].str.contains("/Users/")].index,inplace = True)
df1['clean_content'] = df1['clean_content'].str.replace(r'\s+', ' ')
df1
| freshdesk_id | clean_content | happiness_rating | |
|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good |
| ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN |
421829 rows × 3 columns
#df1.reset_index(drop=True)
#spec_chars = ["!",'"',"#","%","&","'","(",")",
# "*","+",",","-",".","/",":",";","<",
# "=",">","?","@","[","\\","]","^","_",
# "`","{","|","}","~","–","?","<br>","<br/>"]
#for char in spec_chars:
#df1['title'] = df['clean_content'].str.replace(char, ' ')
#df1
df1['no_contract'] = df1['clean_content'].apply(lambda x: [contractions.fix(word) for word in x.split()])
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | |
|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... |
| ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] |
421829 rows × 4 columns
df1['clean_content_str'] = [' '.join(map(str, l)) for l in df1['no_contract']]
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | I have submitted the final project When will the evaluation process complete? |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once as soon as possi... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | My assignment is not getting verified because of which I am not able to get my certificate. Have... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | Can we extend the lab access beyond the initial 60 days? |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello Can you please give a retake for assignment 13 from NLP module ? |
421829 rows × 5 columns
# use regular expressions to replace email addresses, URLs, phone numbers, other numbers
# Replace email addresses with 'email'
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+",' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | I have submitted the final project When will the evaluation process complete? |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once as soon as possi... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | My assignment is not getting verified because of which I am not able to get my certificate. Have... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | Can we extend the lab access beyond the initial 60 days? |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello Can you please give a retake for assignment 13 from NLP module ? |
421829 rows × 5 columns
# Replace money symbols with 'moneysymb' (£ can by typed with ALT key + 156)
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'£|\$', ' ')
# Replace 10 digit phone numbers (formats include paranthesis, spaces, no spaces, dashes) with 'phonenumber'
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'^\(?[\d]{3}\)?[\s-]?[\d]{3}[\s-]?[\d]{4}$',' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | I have submitted the final project When will the evaluation process complete? |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once as soon as possi... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | My assignment is not getting verified because of which I am not able to get my certificate. Have... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | Can we extend the lab access beyond the initial 60 days? |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello Can you please give a retake for assignment 13 from NLP module ? |
421829 rows × 5 columns
# Remove punctuation
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'[^\w\d\s]', ' ')
# Replace whitespace between terms with a single space
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
# Remove leading and trailing whitespace
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'^\s+|\s+?$', '')
# change words to lower case - Hello, HELLO, hello are all the same word
df1['clean_content_str'] = df1['clean_content_str'].str.lower()
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team br br i opted for a full stack web development course on wednesday 22nd april and after ... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team br i am unable to play 2nd recording video br could you check once as soon as possible b... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team br br i am facing difficulty in submitting the below question while execution it gives n... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team br br i request for please update me on pmi exams pattern will change from 30th june20... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team br br i would like to reattend the aws class again br please have a look and let me know... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" br ",' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team br i opted for a full stack web development course on wednesday 22nd april and after enr... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible br than... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team br i am facing difficulty in submitting the below question while execution it gives no e... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team br i request for please update me on pmi exams pattern will change from 30th june2020 ... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team br i would like to reattend the aws class again please have a look and let me know the t... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team br i opted for a full stack web development course on wednesday 22nd april and after enr... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible br than... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team br i am facing difficulty in submitting the below question while execution it gives no e... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team br i request for please update me on pmi exams pattern will change from 30th june2020 ... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team br i would like to reattend the aws class again please have a look and let me know the t... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" br ",' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday 22nd april and after enroll... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible thanks ... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from 30th june2020 reg... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" br br",' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday 22nd april and after enroll... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible thanks ... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from 30th june2020 reg... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" br ",' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday 22nd april and after enroll... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible thanks ... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from 30th june2020 reg... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r"div style font size 13px font family regular lato arial helvetica sans serif ",' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday 22nd april and after enroll... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible thanks ... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from 30th june2020 reg... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r"div span class x_690327091size style font size 16px line height normal ",' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday 22nd april and after enroll... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible thanks ... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from 30th june2020 reg... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" div ",' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday 22nd april and after enroll... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible thanks ... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from 30th june2020 reg... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r"^div ",' ')t div div div
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday 22nd april and after enroll... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible thanks ... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from 30th june2020 reg... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" div div div ",' ') div div
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday 22nd april and after enroll... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible thanks ... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from 30th june2020 reg... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" div div",' ') div
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday 22nd april and after enroll... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible thanks ... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from 30th june2020 reg... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" div ",' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday 22nd april and after enroll... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play 2nd recording video could you check once as soon as possible thanks ... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from 30th june2020 reg... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module 3 and 4 for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial 60 days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment 13 from nlp module |
421829 rows × 5 columns
c = df1[df1['clean_content_str'].str.contains(" div ")]
c
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str |
|---|
#df1['clean_content'] = df1['clean_content'].str.replace(r'^URL:https://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$','')
#df1['clean_content'] = df1['clean_content'].str.replace(r'^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$','')
#df1['clean_content'] = df1['clean_content'].str.replace(r'\s+', ' ')
#df1
# Replace numbers with ''
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\d+(\.\d+)?', '')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | hi team i opted for a full stack web development course on wednesday nd april and after enrollin... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | hi team i am unable to play nd recording video could you check once as soon as possible thanks c... |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | hi team i am facing difficulty in submitting the below question while execution it gives no erro... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | dear team i request for please update me on pmi exams pattern will change from th june regards m... |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module and for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | hi team i would like to reattend the aws class again please have a look and let me know the timi... |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | hello can you please give a retake for assignment from nlp module |
421829 rows × 5 columns
b = df1[df1['clean_content_str'].str.contains(" br ")]
b
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 78 | 2158 | Dear Edureka<br /><br />i had share case study 2 and case study 3<br />please help me through th... | NaN | [Dear, Edureka<br, /><br, />i, had, share, case, study, 2, and, case, study, 3<br, />please, hel... | dear edureka i had share case study 2 and case study 3 please help me through that code i apprec... |
| 267 | 2351 | Hi Team <br /><br />I am trying to create below customize template <br /><br /><br /><br />{<br ... | NaN | [Hi, Team, <br, /><br, />I, am, trying, to, create, below, customize, template, <br, /><br, /><b... | hi team i am trying to create below customize template br schema https schema management azure c... |
| 281 | 2365 | Hi Kunal<br /><br />Tried with code which one u shared earlier but facing same issue. I want to ... | NaN | [Hi, Kunal<br, /><br, />Tried, with, code, which, one, you, shared, earlier, but, facing, same, ... | hi kunal tried with code which one you shared earlier but facing same issue i want to know how t... |
| 666 | 2753 | Hi<br /><br />In the case study you are asking us to:<br />Build a system where when user enters... | NaN | [Hi<br, /><br, />In, the, case, study, you, are, asking, us, to:<br, />Build, a, system, where, ... | hi in the case study you are asking us to build a system where when user enters reference id it ... |
| 767 | 2857 | ubuntu@ip-172-31-29-167:~/Dockercompose$ docker-compose up<br />ERROR: yaml.reader.ReaderError: ... | NaN | [ubuntu@ip-172-31-29-167:~/Dockercompose$, docker-compose, up<br, />ERROR:, yaml.reader.ReaderEr... | ubuntu ip 172 31 29 167 dockercompose docker compose up error yaml reader readererror unacceptab... |
| ... | ... | ... | ... | ... | ... |
| 434443 | 1017399 | Hi<br />In the below code I have name as an global variable and updated the name <br />But when ... | NaN | [Hi<br, />In, the, below, code, I, have, name, as, an, global, variable, and, updated, the, name... | hi in the below code i have name as an global variable and updated the name but when i want to d... |
| 434527 | 1017584 | Hi<br /><br />I am facing an issue with implementing disk quota on user home folder that are hos... | NaN | [Hi<br, /><br, />I, am, facing, an, issue, with, implementing, disk, quota, on, user, home, fold... | hi i am facing an issue with implementing disk quota on user home folder that are hosted on nfs ... |
| 434650 | 1017789 | I have written a simple Java code to print the even and odd numbers below 20 and I am separating... | NaN | [I, have, written, a, simple, Java, code, to, print, the, even, and, odd, numbers, below, 20, an... | i have written a simple java code to print the even and odd numbers below 20 and i am separating... |
| 435413 | 1019469 | Hi<br /><br />Please get back to me on the following.<br /><br /><br />Introduction to Python Ca... | NaN | [Hi<br, /><br, />Please, get, back, to, me, on, the, following.<br, /><br, /><br, />Introduction... | hi please get back to me on the following introduction to python case study 1 question 4 the ans... |
| 435522 | 1019702 | Hi<br /><br />I am getting below error message while pyspark and spark-shell.<br />Could you ple... | NaN | [Hi<br, /><br, />I, am, getting, below, error, message, while, pyspark, and, spark-she will.<br,... | hi i am getting below error message while pyspark and spark she will could you please help me re... |
1253 rows × 5 columns
df1['clean_content_str'][423638]
' the learner is not able to login through lms for last 3 4 classes span nbsp style color rgb 34 34 34 font family arial helvetica sans serif font size small font style normal font variant ligatures normal font variant caps normal font weight 400 letter spacing normal orphans 2 text indent 0px text transform none white space normal widows 2 word spacing 0px background color rgb 255 255 255 text decoration color initial dir auto when the learner tries to join the session it just keeps on loading and never opens up the session nbsp div style cursor pointer outline none padding 10px 0px width 22px margin 2px 0px 0px color rgb 34 34 34 font family arial helvetica sans serif font size small font style normal font variant ligatures normal font variant caps normal font weight 400 letter spacing normal orphans 2 text indent 0px text transform none white space normal widows 2 word spacing 0px background color rgb 255 255 255 text decoration color initial class x_690327091yj6qo x_690327091aju class x_690327091apple interchange newline div'
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' please ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'hi team ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'dear team ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'hello ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'^hi ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' hi ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'^hey ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' hey ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' regards ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' st ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' th ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' nd ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' rd ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' muralikrishna ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' vinod ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' chandan ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' jan ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' feb ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' march ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' april ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' may ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' june ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' july ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' august ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' september ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' october ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' november ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' december ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' edureka', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'][420956]
' we had received a deadline along with the respective project details of to complete and submit the project if we are to be eligible to receive the completion certificate however considering the year end work and the fact that we are now having additional sessions request you to kindly extend the project submission deadline to attached is the email request sent in this regard to mukesh sahu of '
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' mukesh sahu ',' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' day ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' thank ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r' thanks ', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'_', '')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | i opted for a full stack web development course on wednesday and after enrolling into it only i... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | i am unable to play recording video could you check once as soon as possible chandan |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | i am facing difficulty in submitting the below question while execution it gives no error messa... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | i request for update me on pmi exams pattern will change from muralikrishna |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module and for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | i would like to reattend the aws class again have a look and let me know the timings n |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | can you give a retake for assignment from nlp module |
421829 rows × 5 columns
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" url ", ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" http ", ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" amazonaws ", ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" com ", ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r"www", ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r" zip ", ' ')
df1['clean_content_str'] = df1['clean_content_str'].str.replace(r'\s+', ' ')
df1
| freshdesk_id | clean_content | happiness_rating | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | I have submitted the final project When will the evaluation process complete? | NaN | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | NaN | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | i opted for a full stack web development course on wednesday and after enrolling into it only i... |
| 9 | 2082 | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | NaN | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | i am unable to play recording video could you check once as soon as possible chandan |
| 11 | 2084 | My assignment is not getting verified because of which I am not able to get my certificate. Have... | NaN | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | Good | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | i am facing difficulty in submitting the below question while execution it gives no error messa... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | NaN | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | i request for update me on pmi exams pattern will change from muralikrishna |
| 436028 | 1020408 | need reference notes for module 3 and 4 for reference | NaN | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module and for reference |
| 436029 | 1020409 | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | NaN | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | i would like to reattend the aws class again have a look and let me know the timings n |
| 436031 | 1020411 | Can we extend the lab access beyond the initial 60 days? | NaN | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial days |
| 436032 | 1020412 | hello Can you please give a retake for assignment 13 from NLP module ? | NaN | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | can you give a retake for assignment from nlp module |
421829 rows × 5 columns
#from nltk.sentiment.vader import SentimentIntensityAnalyzer
#nltk.download('vader_lexicon')
#sent = SentimentIntensityAnalyzer()
#polarity = [round(sent.polarity_scores(i)['compound'], 2) for i in df1['clean_content']]
#df1['sentiment_score'] = polarity
#df1.head()
#df1.to_csv("qa_sentiment_scores_file.csv",index=False)
for col in df1.columns:
print(col, df1[col].isnull().sum())
freshdesk_id 0 clean_content 0 happiness_rating 363375 no_contract 0 clean_content_str 0
rws = df1.loc[:, ['freshdesk_id', 'happiness_rating','clean_content','no_contract', 'clean_content_str']]
rws
| freshdesk_id | happiness_rating | clean_content | no_contract | clean_content_str | |
|---|---|---|---|---|---|
| 3 | 2076 | NaN | I have submitted the final project When will the evaluation process complete? | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete |
| 6 | 2079 | NaN | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | i opted for a full stack web development course on wednesday and after enrolling into it only i... |
| 9 | 2082 | NaN | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | i am unable to play recording video could you check once as soon as possible chandan |
| 11 | 2084 | NaN | My assignment is not getting verified because of which I am not able to get my certificate. Have... | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... |
| 13 | 2087 | Good | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | i am facing difficulty in submitting the below question while execution it gives no error messa... |
| ... | ... | ... | ... | ... | ... |
| 436027 | 1020407 | NaN | Dear team<br /><br /> I request for please update me on PMI exams pattern will change from 30th ... | [Dear, team<br, /><br, />, I, request, for, please, update, me, on, PMI, exams, pattern, will, c... | i request for update me on pmi exams pattern will change from muralikrishna |
| 436028 | 1020408 | NaN | need reference notes for module 3 and 4 for reference | [need, reference, notes, for, module, 3, and, 4, for, reference] | need reference notes for module and for reference |
| 436029 | 1020409 | NaN | Hi team<br /><br />i would like to reattend the aws class again.<br />please have a look and let... | [Hi, team<br, /><br, />i, would, like, to, reattend, the, aws, class, again.<br, />please, have,... | i would like to reattend the aws class again have a look and let me know the timings n |
| 436031 | 1020411 | NaN | Can we extend the lab access beyond the initial 60 days? | [Can, we, extend, the, lab, access, beyond, the, initial, 60, days?] | can we extend the lab access beyond the initial days |
| 436032 | 1020412 | NaN | hello Can you please give a retake for assignment 13 from NLP module ? | [hello, Can, you, please, give, a, retake, for, assignment, 13, from, NLP, module, ?] | can you give a retake for assignment from nlp module |
421829 rows × 5 columns
rws['tokenized'] = rws['clean_content_str'].apply(word_tokenize)
rws.head()
| freshdesk_id | happiness_rating | clean_content | no_contract | clean_content_str | tokenized | |
|---|---|---|---|---|---|---|
| 3 | 2076 | NaN | I have submitted the final project When will the evaluation process complete? | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] |
| 6 | 2079 | NaN | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | i opted for a full stack web development course on wednesday and after enrolling into it only i... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... |
| 9 | 2082 | NaN | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | i am unable to play recording video could you check once as soon as possible chandan | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... |
| 11 | 2084 | NaN | My assignment is not getting verified because of which I am not able to get my certificate. Have... | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... |
| 13 | 2087 | Good | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | i am facing difficulty in submitting the below question while execution it gives no error messa... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... |
punc = string.punctuation
rws['no_punc'] = rws['tokenized'].apply(lambda x: [word for word in x if word not in punc])
rws.head()
| freshdesk_id | happiness_rating | clean_content | no_contract | clean_content_str | tokenized | no_punc | |
|---|---|---|---|---|---|---|---|
| 3 | 2076 | NaN | I have submitted the final project When will the evaluation process complete? | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] |
| 6 | 2079 | NaN | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | i opted for a full stack web development course on wednesday and after enrolling into it only i... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... |
| 9 | 2082 | NaN | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | i am unable to play recording video could you check once as soon as possible chandan | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... |
| 11 | 2084 | NaN | My assignment is not getting verified because of which I am not able to get my certificate. Have... | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... |
| 13 | 2087 | Good | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | i am facing difficulty in submitting the below question while execution it gives no error messa... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... |
stop_words = set(stopwords.words('english'))
rws['stopwords_removed'] = rws['no_punc'].apply(lambda x: [word for word in x if word not in stop_words])
rws.head()
| freshdesk_id | happiness_rating | clean_content | no_contract | clean_content_str | tokenized | no_punc | stopwords_removed | |
|---|---|---|---|---|---|---|---|---|
| 3 | 2076 | NaN | I have submitted the final project When will the evaluation process complete? | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [submitted, final, project, evaluation, process, complete] |
| 6 | 2079 | NaN | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | i opted for a full stack web development course on wednesday and after enrolling into it only i... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [opted, full, stack, web, development, course, wednesday, enrolling, came, know, nothing, java, ... |
| 9 | 2082 | NaN | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | i am unable to play recording video could you check once as soon as possible chandan | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [unable, play, recording, video, could, check, soon, possible, chandan] |
| 11 | 2084 | NaN | My assignment is not getting verified because of which I am not able to get my certificate. Have... | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [assignment, getting, verified, able, get, certificate, look, please] |
| 13 | 2087 | Good | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | i am facing difficulty in submitting the below question while execution it gives no error messa... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [facing, difficulty, submitting, question, execution, gives, error, message, submitting, fails, ... |
rws['pos_tags'] = rws['stopwords_removed'].apply(nltk.tag.pos_tag)
rws.head()
| freshdesk_id | happiness_rating | clean_content | no_contract | clean_content_str | tokenized | no_punc | stopwords_removed | pos_tags | |
|---|---|---|---|---|---|---|---|---|---|
| 3 | 2076 | NaN | I have submitted the final project When will the evaluation process complete? | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [submitted, final, project, evaluation, process, complete] | [(submitted, VBN), (final, JJ), (project, NN), (evaluation, NN), (process, NN), (complete, JJ)] |
| 6 | 2079 | NaN | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | i opted for a full stack web development course on wednesday and after enrolling into it only i... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [opted, full, stack, web, development, course, wednesday, enrolling, came, know, nothing, java, ... | [(opted, VBN), (full, JJ), (stack, NN), (web, NN), (development, NN), (course, NN), (wednesday, ... |
| 9 | 2082 | NaN | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | i am unable to play recording video could you check once as soon as possible chandan | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [unable, play, recording, video, could, check, soon, possible, chandan] | [(unable, JJ), (play, NN), (recording, VBG), (video, NN), (could, MD), (check, VB), (soon, RB), ... |
| 11 | 2084 | NaN | My assignment is not getting verified because of which I am not able to get my certificate. Have... | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [assignment, getting, verified, able, get, certificate, look, please] | [(assignment, NN), (getting, VBG), (verified, VBN), (able, JJ), (get, NN), (certificate, JJ), (l... |
| 13 | 2087 | Good | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | i am facing difficulty in submitting the below question while execution it gives no error messa... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [facing, difficulty, submitting, question, execution, gives, error, message, submitting, fails, ... | [(facing, VBG), (difficulty, NN), (submitting, VBG), (question, NN), (execution, NN), (gives, VB... |
def get_wordnet_pos(tag):
if tag.startswith('J'):
return wordnet.ADJ
elif tag.startswith('V'):
return wordnet.VERB
elif tag.startswith('N'):
return wordnet.NOUN
elif tag.startswith('R'):
return wordnet.ADV
else:
return wordnet.NOUN
rws['wordnet_pos'] = rws['pos_tags'].apply(lambda x: [(word, get_wordnet_pos(pos_tag)) for (word, pos_tag) in x])
rws.head()
| freshdesk_id | happiness_rating | clean_content | no_contract | clean_content_str | tokenized | no_punc | stopwords_removed | pos_tags | wordnet_pos | |
|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 2076 | NaN | I have submitted the final project When will the evaluation process complete? | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [submitted, final, project, evaluation, process, complete] | [(submitted, VBN), (final, JJ), (project, NN), (evaluation, NN), (process, NN), (complete, JJ)] | [(submitted, v), (final, a), (project, n), (evaluation, n), (process, n), (complete, a)] |
| 6 | 2079 | NaN | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | i opted for a full stack web development course on wednesday and after enrolling into it only i... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [opted, full, stack, web, development, course, wednesday, enrolling, came, know, nothing, java, ... | [(opted, VBN), (full, JJ), (stack, NN), (web, NN), (development, NN), (course, NN), (wednesday, ... | [(opted, v), (full, a), (stack, n), (web, n), (development, n), (course, n), (wednesday, n), (en... |
| 9 | 2082 | NaN | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | i am unable to play recording video could you check once as soon as possible chandan | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [unable, play, recording, video, could, check, soon, possible, chandan] | [(unable, JJ), (play, NN), (recording, VBG), (video, NN), (could, MD), (check, VB), (soon, RB), ... | [(unable, a), (play, n), (recording, v), (video, n), (could, n), (check, v), (soon, r), (possibl... |
| 11 | 2084 | NaN | My assignment is not getting verified because of which I am not able to get my certificate. Have... | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [assignment, getting, verified, able, get, certificate, look, please] | [(assignment, NN), (getting, VBG), (verified, VBN), (able, JJ), (get, NN), (certificate, JJ), (l... | [(assignment, n), (getting, v), (verified, v), (able, a), (get, n), (certificate, a), (look, n),... |
| 13 | 2087 | Good | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | i am facing difficulty in submitting the below question while execution it gives no error messa... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [facing, difficulty, submitting, question, execution, gives, error, message, submitting, fails, ... | [(facing, VBG), (difficulty, NN), (submitting, VBG), (question, NN), (execution, NN), (gives, VB... | [(facing, v), (difficulty, n), (submitting, v), (question, n), (execution, n), (gives, v), (erro... |
wnl = WordNetLemmatizer()
rws['lemmatized'] = rws['wordnet_pos'].apply(lambda x: [wnl.lemmatize(word, tag) for word, tag in x])
rws.head()
| freshdesk_id | happiness_rating | clean_content | no_contract | clean_content_str | tokenized | no_punc | stopwords_removed | pos_tags | wordnet_pos | lemmatized | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 2076 | NaN | I have submitted the final project When will the evaluation process complete? | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [submitted, final, project, evaluation, process, complete] | [(submitted, VBN), (final, JJ), (project, NN), (evaluation, NN), (process, NN), (complete, JJ)] | [(submitted, v), (final, a), (project, n), (evaluation, n), (process, n), (complete, a)] | [submit, final, project, evaluation, process, complete] |
| 6 | 2079 | NaN | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | i opted for a full stack web development course on wednesday and after enrolling into it only i... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [opted, full, stack, web, development, course, wednesday, enrolling, came, know, nothing, java, ... | [(opted, VBN), (full, JJ), (stack, NN), (web, NN), (development, NN), (course, NN), (wednesday, ... | [(opted, v), (full, a), (stack, n), (web, n), (development, n), (course, n), (wednesday, n), (en... | [opt, full, stack, web, development, course, wednesday, enrol, come, know, nothing, java, devlop... |
| 9 | 2082 | NaN | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | i am unable to play recording video could you check once as soon as possible chandan | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [unable, play, recording, video, could, check, soon, possible, chandan] | [(unable, JJ), (play, NN), (recording, VBG), (video, NN), (could, MD), (check, VB), (soon, RB), ... | [(unable, a), (play, n), (recording, v), (video, n), (could, n), (check, v), (soon, r), (possibl... | [unable, play, record, video, could, check, soon, possible, chandan] |
| 11 | 2084 | NaN | My assignment is not getting verified because of which I am not able to get my certificate. Have... | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [assignment, getting, verified, able, get, certificate, look, please] | [(assignment, NN), (getting, VBG), (verified, VBN), (able, JJ), (get, NN), (certificate, JJ), (l... | [(assignment, n), (getting, v), (verified, v), (able, a), (get, n), (certificate, a), (look, n),... | [assignment, get, verify, able, get, certificate, look, please] |
| 13 | 2087 | Good | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | i am facing difficulty in submitting the below question while execution it gives no error messa... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [facing, difficulty, submitting, question, execution, gives, error, message, submitting, fails, ... | [(facing, VBG), (difficulty, NN), (submitting, VBG), (question, NN), (execution, NN), (gives, VB... | [(facing, v), (difficulty, n), (submitting, v), (question, n), (execution, n), (gives, v), (erro... | [face, difficulty, submit, question, execution, give, error, message, submit, fails, test, case,... |
rws['lemma_str'] =[' '.join(map(str, l)) for l in rws['lemmatized']]
rws.head()
| freshdesk_id | happiness_rating | clean_content | no_contract | clean_content_str | tokenized | no_punc | stopwords_removed | pos_tags | wordnet_pos | lemmatized | lemma_str | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 2076 | NaN | I have submitted the final project When will the evaluation process complete? | [I, have, submitted, the, final, project, When, will, the, evaluation, process, complete?] | i have submitted the final project when will the evaluation process complete | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [i, have, submitted, the, final, project, when, will, the, evaluation, process, complete] | [submitted, final, project, evaluation, process, complete] | [(submitted, VBN), (final, JJ), (project, NN), (evaluation, NN), (process, NN), (complete, JJ)] | [(submitted, v), (final, a), (project, n), (evaluation, n), (process, n), (complete, a)] | [submit, final, project, evaluation, process, complete] | submit final project evaluation process complete |
| 6 | 2079 | NaN | Hi team<br /><br /> I opted for a Full stack web development course on Wednesday 22nd April and ... | [Hi, team<br, /><br, />, I, opted, for, a, Full, stack, web, development, course, on, Wednesday,... | i opted for a full stack web development course on wednesday and after enrolling into it only i... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [i, opted, for, a, full, stack, web, development, course, on, wednesday, and, after, enrolling, ... | [opted, full, stack, web, development, course, wednesday, enrolling, came, know, nothing, java, ... | [(opted, VBN), (full, JJ), (stack, NN), (web, NN), (development, NN), (course, NN), (wednesday, ... | [(opted, v), (full, a), (stack, n), (web, n), (development, n), (course, n), (wednesday, n), (en... | [opt, full, stack, web, development, course, wednesday, enrol, come, know, nothing, java, devlop... | opt full stack web development course wednesday enrol come know nothing java devloping side thin... |
| 9 | 2082 | NaN | Hi Team<br />I am unable to play 2nd recording video.<br />Could you check once asap.<br /><br /... | [Hi, Team<br, />I, am, unable, to, play, 2nd, recording, video.<br, />Could, you, check, once, a... | i am unable to play recording video could you check once as soon as possible chandan | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [i, am, unable, to, play, recording, video, could, you, check, once, as, soon, as, possible, cha... | [unable, play, recording, video, could, check, soon, possible, chandan] | [(unable, JJ), (play, NN), (recording, VBG), (video, NN), (could, MD), (check, VB), (soon, RB), ... | [(unable, a), (play, n), (recording, v), (video, n), (could, n), (check, v), (soon, r), (possibl... | [unable, play, record, video, could, check, soon, possible, chandan] | unable play record video could check soon possible chandan |
| 11 | 2084 | NaN | My assignment is not getting verified because of which I am not able to get my certificate. Have... | [My, assignment, is, not, getting, verified, because, of, which, I, am, not, able, to, get, my, ... | my assignment is not getting verified because of which i am not able to get my certificate have ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [my, assignment, is, not, getting, verified, because, of, which, i, am, not, able, to, get, my, ... | [assignment, getting, verified, able, get, certificate, look, please] | [(assignment, NN), (getting, VBG), (verified, VBN), (able, JJ), (get, NN), (certificate, JJ), (l... | [(assignment, n), (getting, v), (verified, v), (able, a), (get, n), (certificate, a), (look, n),... | [assignment, get, verify, able, get, certificate, look, please] | assignment get verify able get certificate look please |
| 13 | 2087 | Good | Hi Team<br /><br />I am facing difficulty in submitting the below question. While execution it g... | [Hi, Team<br, /><br, />I, am, facing, difficulty, in, submitting, the, below, question., While, ... | i am facing difficulty in submitting the below question while execution it gives no error messa... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [i, am, facing, difficulty, in, submitting, the, below, question, while, execution, it, gives, n... | [facing, difficulty, submitting, question, execution, gives, error, message, submitting, fails, ... | [(facing, VBG), (difficulty, NN), (submitting, VBG), (question, NN), (execution, NN), (gives, VB... | [(facing, v), (difficulty, n), (submitting, v), (question, n), (execution, n), (gives, v), (erro... | [face, difficulty, submit, question, execution, give, error, message, submit, fails, test, case,... | face difficulty submit question execution give error message submit fails test case let know wro... |
rws.to_csv('qa_sentiment_analysis_EDA_main4.csv')
df3 = rws[['freshdesk_id','happiness_rating', 'lemmatized','lemma_str']]
df3.head()
| freshdesk_id | happiness_rating | lemmatized | lemma_str | |
|---|---|---|---|---|
| 3 | 2076 | NaN | [submit, final, project, evaluation, process, complete] | submit final project evaluation process complete |
| 6 | 2079 | NaN | [opt, full, stack, web, development, course, wednesday, enrol, come, know, nothing, java, devlop... | opt full stack web development course wednesday enrol come know nothing java devloping side thin... |
| 9 | 2082 | NaN | [unable, play, record, video, could, check, soon, possible, chandan] | unable play record video could check soon possible chandan |
| 11 | 2084 | NaN | [assignment, get, verify, able, get, certificate, look, please] | assignment get verify able get certificate look please |
| 13 | 2087 | Good | [face, difficulty, submit, question, execution, give, error, message, submit, fails, test, case,... | face difficulty submit question execution give error message submit fails test case let know wro... |
df3['sentiment'] = df3['lemma_str'].apply(lambda x: TextBlob(x).sentiment.polarity)
df3.head()
| freshdesk_id | happiness_rating | lemmatized | lemma_str | sentiment | |
|---|---|---|---|---|---|
| 3 | 2076 | NaN | [submit, final, project, evaluation, process, complete] | submit final project evaluation process complete | 0.050000 |
| 6 | 2079 | NaN | [opt, full, stack, web, development, course, wednesday, enrol, come, know, nothing, java, devlop... | opt full stack web development course wednesday enrol come know nothing java devloping side thin... | 0.412500 |
| 9 | 2082 | NaN | [unable, play, record, video, could, check, soon, possible, chandan] | unable play record video could check soon possible chandan | -0.250000 |
| 11 | 2084 | NaN | [assignment, get, verify, able, get, certificate, look, please] | assignment get verify able get certificate look please | 0.500000 |
| 13 | 2087 | Good | [face, difficulty, submit, question, execution, give, error, message, submit, fails, test, case,... | face difficulty submit question execution give error message submit fails test case let know wro... | -0.142857 |
df3.to_csv('qa_sentiment_analysis_EDA_senti_category.csv')
df4 = pd.read_csv('qa_sentiment_analysis_EDA_senti_category.csv')
df4.head()
| freshdesk_id | happiness_rating | rating | lemmatized | lemma_str | sentiment | senti_category | |
|---|---|---|---|---|---|---|---|
| 0 | 2076 | NaN | 0 | ['submit', 'final', 'project', 'evaluation', 'process', 'complete'] | submit final project evaluation process complete | 0.050000 | Positive |
| 1 | 2079 | NaN | 0 | ['opt', 'full', 'stack', 'web', 'development', 'course', 'wednesday', 'enrol', 'come', 'know', '... | opt full stack web development course wednesday enrol come know nothing java devloping side thin... | 0.412500 | Positive |
| 2 | 2082 | NaN | 0 | ['unable', 'play', 'record', 'video', 'could', 'check', 'soon', 'possible', 'chandan'] | unable play record video could check soon possible chandan | -0.250000 | Negative |
| 3 | 2084 | NaN | 0 | ['assignment', 'get', 'verify', 'able', 'get', 'certificate', 'look', 'please'] | assignment get verify able get certificate look please | 0.500000 | Positive |
| 4 | 2087 | Good | 5 | ['face', 'difficulty', 'submit', 'question', 'execution', 'give', 'error', 'message', 'submit', ... | face difficulty submit question execution give error message submit fails test case let know wro... | -0.142857 | Negative |
plt.figure(figsize=(50,30))
plt.margins(0.02)
plt.xlabel('Sentiment', fontsize=50)
plt.xticks(fontsize=40)
plt.ylabel('Frequency', fontsize=50)
plt.yticks(fontsize=40)
plt.hist(df4['sentiment'], bins=50)
plt.title('Sentiment Distribution', fontsize=60)
plt.show()
plt.figure(figsize=(50,30))
plt.margins(0.02)
plt.xlabel('Sentiment Category', fontsize=50)
plt.xticks(fontsize=40)
plt.ylabel('Frequency', fontsize=50)
plt.yticks(fontsize=40)
plt.hist(df4['senti_category'],bins=10)
plt.title('Sentiment Distribution', fontsize=60)
plt.show()
x_rating = df4.happiness_rating.value_counts()
y_rating = x_rating.sort_index()
plt.figure(figsize=(50,30))
sns.barplot(x_rating.index, x_rating.values, alpha=0.8)
plt.title("Happiness Rating Distribution", fontsize=50)
plt.ylabel('Frequency', fontsize=50)
plt.yticks(fontsize=40)
plt.xlabel('Happiness Ratings', fontsize=50)
plt.xticks(fontsize=40)
(array([0, 1, 2]), [Text(0, 0, 'Good'), Text(1, 0, 'Bad'), Text(2, 0, 'Okay')])
x_rating
Good 52612 Bad 3760 Okay 2082 Name: happiness_rating, dtype: int64
plt.figure(figsize=(30,10))
plt.title('Percentage of Ratings', fontsize=20)
df4.happiness_rating.value_counts().plot(kind='pie', labels=['Good', 'Bad', 'Okay'],
wedgeprops=dict(width=.7), autopct='%1.0f%%', startangle= -20,
textprops={'fontsize': 15})
<AxesSubplot:title={'center':'Percentage of Ratings'}, ylabel='happiness_rating'>
plt.figure(figsize=(30,10))
plt.title('Sentiment Category Distribution', fontsize=20)
df4.senti_category.value_counts().plot(kind='pie', labels=['Neutral','Positive','Negative'],
wedgeprops=dict(width=.7), autopct='%1.0f%%', startangle= -20,
textprops={'fontsize': 15})
<AxesSubplot:title={'center':'Sentiment Category Distribution'}, ylabel='senti_category'>
polarity_avg = df4.groupby('happiness_rating')['sentiment'].mean().plot(kind='bar', figsize=(50,30))
plt.xlabel('Rating', fontsize=45)
plt.ylabel('Average Sentiment', fontsize=45)
plt.xticks(fontsize=40)
plt.yticks(fontsize=40)
plt.title('Average Sentiment per Happiness Rating Distribution', fontsize=50)
plt.show()
polarity_avg = df4.groupby('senti_category')['rating'].mean().plot(kind='bar', figsize=(50,30))
plt.xlabel('Sentiment Category', fontsize=45)
plt.ylabel('Average Rating', fontsize=45)
plt.xticks(fontsize=40)
plt.yticks(fontsize=40)
plt.title('Average Rating per Sentiment Category Distribution', fontsize=50)
plt.show()
df4['word_count'] = df4['lemmatized'].apply(lambda x: len(str(x).split()))
df4['review_len'] = df4['lemma_str'].astype(str).apply(len)
df4
| freshdesk_id | happiness_rating | rating | lemmatized | lemma_str | sentiment | senti_category | word_count | review_len | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2076 | NaN | 0 | ['submit', 'final', 'project', 'evaluation', 'process', 'complete'] | submit final project evaluation process complete | 0.050000 | Positive | 6 | 48 |
| 1 | 2079 | NaN | 0 | ['opt', 'full', 'stack', 'web', 'development', 'course', 'wednesday', 'enrol', 'come', 'know', '... | opt full stack web development course wednesday enrol come know nothing java devloping side thin... | 0.412500 | Positive | 31 | 211 |
| 2 | 2082 | NaN | 0 | ['unable', 'play', 'record', 'video', 'could', 'check', 'soon', 'possible', 'chandan'] | unable play record video could check soon possible chandan | -0.250000 | Negative | 9 | 58 |
| 3 | 2084 | NaN | 0 | ['assignment', 'get', 'verify', 'able', 'get', 'certificate', 'look', 'please'] | assignment get verify able get certificate look please | 0.500000 | Positive | 8 | 54 |
| 4 | 2087 | Good | 5 | ['face', 'difficulty', 'submit', 'question', 'execution', 'give', 'error', 'message', 'submit', ... | face difficulty submit question execution give error message submit fails test case let know wro... | -0.142857 | Negative | 129 | 1094 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 421823 | 1020407 | NaN | 0 | ['request', 'update', 'pmi', 'exams', 'pattern', 'change', 'muralikrishna'] | request update pmi exams pattern change muralikrishna | 0.000000 | Neutral | 7 | 53 |
| 421824 | 1020408 | NaN | 0 | ['need', 'reference', 'note', 'module', 'reference'] | need reference note module reference | 0.000000 | Neutral | 5 | 36 |
| 421825 | 1020409 | NaN | 0 | ['would', 'like', 'reattend', 'aws', 'class', 'look', 'let', 'know', 'timing', 'n'] | would like reattend aws class look let know timing n | 0.000000 | Neutral | 10 | 52 |
| 421826 | 1020411 | NaN | 0 | ['extend', 'lab', 'access', 'beyond', 'initial', 'day'] | extend lab access beyond initial day | 0.000000 | Neutral | 6 | 36 |
| 421827 | 1020412 | NaN | 0 | ['give', 'retake', 'assignment', 'nlp', 'module'] | give retake assignment nlp module | 0.000000 | Neutral | 5 | 33 |
421828 rows × 9 columns
letter_avg = df4.groupby('happiness_rating')['review_len'].mean().plot(kind='bar', figsize=(50,30))
plt.xlabel('Rating', fontsize=35)
plt.ylabel('Count of Letters in Rating', fontsize=35)
plt.xticks(fontsize=40)
plt.yticks(fontsize=40)
plt.title('Average Number of Letters per Rating Distribution', fontsize=40)
plt.show()
letter_avg = df4.groupby('senti_category')['review_len'].mean().plot(kind='bar', figsize=(50,30))
plt.xlabel('Sentiment Category', fontsize=35)
plt.ylabel('Count of Letters', fontsize=35)
plt.xticks(fontsize=40)
plt.yticks(fontsize=40)
plt.title('Average Number of Letters per Sentiment Category Distribution', fontsize=40)
plt.show()
word_avg = df4.groupby('happiness_rating')['word_count'].mean().plot(kind='bar', figsize=(50,30))
plt.xlabel('Rating', fontsize=35)
plt.ylabel('Count of Words in Rating', fontsize=35)
plt.xticks(fontsize=40)
plt.yticks(fontsize=40)
plt.title('Average Number of Words per Rating Distribution', fontsize=40)
plt.show()
word_avg = df4.groupby('senti_category')['word_count'].mean().plot(kind='bar', figsize=(50,30))
plt.xlabel('Sentiment Category', fontsize=35)
plt.ylabel('Count of Words', fontsize=35)
plt.xticks(fontsize=40)
plt.yticks(fontsize=40)
plt.title('Average Number of Words per Sentiment Category Distribution', fontsize=40)
plt.show()
#df4['rating'] = df4['rating'].str.replace(r" ", '')
#pd.to_numeric(df4['rating'])
corr = df4[['sentiment', 'review_len', 'word_count','rating']].corr()
plt.figure(figsize=(50,30))
plt.xticks(fontsize=40)
plt.yticks(fontsize=40)
sns.heatmap(corr, cmap='coolwarm', annot=True, annot_kws={"size": 40}, linewidths=10, vmin=-1.5)
<AxesSubplot:>
words = df3['lemmatized']
allwords = []
for wordlist in words:
allwords += wordlist
print(allwords)
IOPub data rate exceeded. The notebook server will temporarily stop sending output to the client in order to avoid crashing it. To change this limit, set the config variable `--NotebookApp.iopub_data_rate_limit`. Current values: NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec) NotebookApp.rate_limit_window=3.0 (secs)
mostcommon = FreqDist(allwords).most_common(100)
wordcloud = WordCloud(width=1600, height=800, background_color='white').generate(str(mostcommon))
fig = plt.figure(figsize=(30,10), facecolor='white')
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis('off')
plt.title('Top Most Common Words', fontsize=100)
plt.tight_layout(pad=0)
plt.show()
#mostcommon = FreqDist(words).most_common(100)
wordcloud = WordCloud(width=1600, height=800, background_color='white').generate(str(words))
fig = plt.figure(figsize=(30,10), facecolor='white')
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis('off')
plt.title('Top 100 Most Common Words', fontsize=100)
plt.tight_layout(pad=0)
plt.show()
wordcloud = WordCloud(width=1600, height=800, background_color='white').generate(str(allwords))
fig = plt.figure(figsize=(30,10), facecolor='white')
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis('off')
plt.title('Top 100 Most Common Words', fontsize=100)
plt.tight_layout(pad=0)
plt.show()
mostcommon_small = FreqDist(allwords).most_common(25)
x, y = zip(*mostcommon_small)
plt.figure(figsize=(50,30))
plt.margins(0.02)
plt.bar(x, y)
plt.xlabel('Words', fontsize=50)
plt.ylabel('Frequency of Words', fontsize=50)
plt.yticks(fontsize=40)
plt.xticks(rotation=60, fontsize=40)
plt.title('Frequency of 25 Most Common Words', fontsize=60)
plt.show()
group_by = df3.groupby('happiness_rating')['lemma_str'].apply(lambda x: Counter(' '.join(x).split()).most_common(25))
group_by_0 = group_by.iloc[0]
words0 = list(zip(*group_by_0))[0]
freq0 = list(zip(*group_by_0))[1]
plt.figure(figsize=(50,30))
plt.bar(words0, freq0)
plt.xlabel('Words', fontsize=50)
plt.ylabel('Frequency of Words', fontsize=50)
plt.yticks(fontsize=40)
plt.xticks(rotation=60, fontsize=40)
plt.title('Frequency of 25 Most Common Words for Happiness Rating="Bad"', fontsize=60)
plt.show()
group_by = df3.groupby('happiness_rating')['lemma_str'].apply(lambda x: Counter(' '.join(x).split()).most_common(25))
group_by
happiness_rating Bad [(course, 1594), (class, 1033), (get, 892), (batch, 724), (font, 692), (assignment, 600), (need,... Good [(course, 15501), (batch, 12587), (assignment, 11789), (project, 10004), (class, 9336), (get, 92... Okay [(course, 530), (class, 487), (get, 481), (assignment, 396), (help, 350), (test, 302), (need, 28... Name: lemma_str, dtype: object
group_by_1 = group_by.iloc[1]
words1 = list(zip(*group_by_1))[0]
freq1 = list(zip(*group_by_1))[1]
plt.figure(figsize=(50,30))
plt.bar(words1, freq1)
plt.xlabel('Words', fontsize=50)
plt.ylabel('Frequency of Words', fontsize=50)
plt.yticks(fontsize=40)
plt.xticks(rotation=60, fontsize=40)
plt.title('Frequency of 25 Most Common Words for Happiness Rating="Good"', fontsize=60)
plt.show()
group_by_2 = group_by.iloc[2]
words2 = list(zip(*group_by_2))[0]
freq2 = list(zip(*group_by_2))[1]
plt.figure(figsize=(50,30))
plt.bar(words2, freq2)
plt.xlabel('Words', fontsize=50)
plt.ylabel('Frequency of Words', fontsize=50)
plt.yticks(fontsize=40)
plt.xticks(rotation=60, fontsize=40)
plt.title('Frequency of 25 Most Common Words for Happiness Rating="Okay"', fontsize=60)
plt.show()
df_t = df3
df_t.dropna(subset=["happiness_rating"],inplace=True)
df_t.isna().sum()
df_t
| freshdesk_id | happiness_rating | lemmatized | lemma_str | sentiment | |
|---|---|---|---|---|---|
| 13 | 2087 | Good | [face, difficulty, submit, question, execution, give, error, message, submit, fails, test, case,... | face difficulty submit question execution give error message submit fails test case let know wro... | -0.142857 |
| 14 | 2088 | Bad | [tire, lot, able, figure, proceed, maybe, syntax, group, percentile, clear, guide] | tire lot able figure proceed maybe syntax group percentile clear guide | 0.300000 |
| 62 | 2141 | Good | [need, help, step, assignment, build, random, forest, classifier, preferably, perform, step, tog... | need help step assignment build random forest classifier preferably perform step together use pi... | -0.266667 |
| 72 | 2152 | Good | [support, seem, bit, difficult, follow, topic, demo, bit, old, course, update, course, last, goo... | support seem bit difficult follow topic demo bit old course update course last good batch one pr... | 0.075000 |
| 77 | 2157 | Good | [need, pmp, certification, exam, train, bit, early, request, change, devops, certification, trai... | need pmp certification exam train bit early request change devops certification train plm mr kum... | 0.025000 |
| ... | ... | ... | ... | ... | ... |
| 424670 | 444364 | Good | [let, know, cloud, architect, master, program, batch, go, start, month, iam, look, batch, always... | let know cloud architect master program batch go start month iam look batch always say current b... | 0.000000 |
| 424678 | 444375 | Good | [package, deploy, application] | package deploy application | 0.000000 |
| 424679 | 444376 | Good | [submit, machine, learn, test, kindly, consider, manual, checking, code, machine, learn, test, m... | submit machine learn test kindly consider manual checking code machine learn test module | 0.600000 |
| 424688 | 444386 | Good | [iam, unable, see, big, data, architect, master, progam, course, check, note, register, techmast... | iam unable see big data architect master progam course check note register techmasters program | -0.250000 |
| 424693 | 444393 | Good | [data, bind, animation, topic, touch, yet, cover, next, weekend, class, due, date, help] | data bind animation topic touch yet cover next weekend class due date help | -0.062500 |
58454 rows × 5 columns
tf_vectorizer = CountVectorizer(max_df=0.9, min_df=25, max_features=5000)
tf = tf_vectorizer.fit_transform(df_t['lemma_str'].values.astype('U'))
tf_feature_names = tf_vectorizer.get_feature_names()
doc_term_matrix = pd.DataFrame(tf.toarray(), columns=list(tf_feature_names))
doc_term_matrix
| aa | aakash | ab | abhinav | abhishek | able | absent | absolutely | accept | acceptable | ... | yesterday | yet | yml | youtube | ytest | ytrain | yum | zero | zip | zone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58449 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 58450 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 58451 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 58452 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 58453 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
58454 rows × 2355 columns
lda_model = LatentDirichletAllocation(n_components=10, learning_method='online', max_iter=500, random_state=0).fit(tf)
no_top_words = 10
def display_topics(model, feature_names, no_top_words):
for topic_idx, topic in enumerate(model.components_):
print("Topic %d:" % (topic_idx))
print(" ".join([feature_names[i]
for i in topic.argsort()[:-no_top_words - 1:-1]]))
display_topics(lda_model, tf_feature_names, no_top_words)
Topic 0: code error get attach link output try issue find help Topic 1: know access let module case study lab receive detail lm Topic 2: font px name color style rgb size span print value Topic 3: batch class would start session like attend record want request Topic 4: java org version system spark apache io https git selenium Topic 5: test assignment question submit solution learn check show mark code Topic 6: project assignment submit http upload content date library extend submission Topic 7: please help share error jenkins get use docker install try Topic 8: course certification certificate get complete program request training time python Topic 9: data file use big create box none explain set csv
pyLDAvis.enable_notebook()
panel = pyLDAvis.sklearn.prepare(lda_model, tf, tf_vectorizer, mds='tsne')
panel
tfidf_vectorizer = TfidfVectorizer(max_df=0.90, min_df =25, max_features=5000, use_idf=True)
tfidf = tfidf_vectorizer.fit_transform(df_t['lemma_str'])
tfidf_feature_names = tfidf_vectorizer.get_feature_names()
doc_term_matrix_tfidf = pd.DataFrame(tfidf.toarray(), columns=list(tfidf_feature_names))
doc_term_matrix_tfidf
| aa | aakash | ab | abhinav | abhishek | able | absent | absolutely | accept | acceptable | ... | yesterday | yet | yml | youtube | ytest | ytrain | yum | zero | zip | zone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.16534 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58449 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 58450 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 58451 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 58452 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 58453 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.276765 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
58454 rows × 2355 columns
nmf = NMF(n_components=10, random_state=0, alpha=.1, init='nndsvd').fit(tfidf)
display_topics(nmf, tfidf_feature_names, no_top_words)
Topic 0: assignment submit module complete review solution quiz show case still Topic 1: upload library content http ipynb zip file docx mid pdf Topic 2: batch request upcoming start change enroll shift next new weekend Topic 3: course certification training devops train complete aws want would python Topic 4: certificate get complete completion receive name workshop download generate know Topic 5: project submit extend date submission complete due time work need Topic 6: class record recording attend video session join today live see Topic 7: help need error get file access lab code case study Topic 8: please provide share call lab access help detail evaluate add Topic 9: test code question evaluate end mark learn machine kindly manually
nmf_topic_values = nmf.transform(tfidf)
df_t['nmf_topics'] = nmf_topic_values.argmax(axis=1)
lda_topic_values = lda_model.transform(tf)
df_t['lda_topics'] = lda_topic_values.argmax(axis=1)
lda_remap = {0: 'Coding Usage Error', 1: 'Lab Access',
2: 'Unknown - not related', 3: 'Class Recording', 4: 'Java Course Related',
5: 'Test Solution Related', 6: 'Project Submission Date Extension', 7: 'Software Installation Help',
8: 'Course Certification', 9: 'Course Data File'}
df_t['lda_topics'] = df_t['lda_topics'].map(lda_remap)
nmf_remap = {0: 'Assignment Complete Review Pending', 1: 'Upload Content', 2: 'Batch Shift Enrollment', 3: 'Course Material: Python',
4: 'Course Certification', 5: 'Project Submission Date Extension', 6: 'Class Recording', 7: 'Case Study Code',
8: 'Lab Access', 9: 'Project Evaluation'}
df_t['nmf_topics'] = df_t['nmf_topics'].map(nmf_remap)
df_t.to_csv('qa_sentiment_analysis_LDA_NMF_Topics.csv')
nmf_x = df_t['nmf_topics'].value_counts()
nmf_y = nmf_x.sort_index()
plt.figure(figsize=(50,30))
sns.barplot(nmf_x, nmf_y.index)
plt.title("NMF Topic Distribution", fontsize=50)
plt.ylabel('Review Topics', fontsize=50)
plt.yticks(fontsize=40)
plt.xlabel('Frequency', fontsize=50)
plt.xticks(fontsize=40)
(array([ 0., 2000., 4000., 6000., 8000., 10000., 12000., 14000.,
16000.]),
[Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, '')])
df_low_ratings = df_t.loc[(df_t['happiness_rating']=="Bad")]
nmf_low_x = df_low_ratings['nmf_topics'].value_counts()
nmf_low_y = nmf_low_x.sort_index()
plt.figure(figsize=(50,30))
sns.barplot(nmf_low_x, nmf_low_y.index)
plt.title("NMF Topic Distribution for Low Ratings", fontsize=50)
plt.ylabel('Frequency', fontsize=50)
plt.yticks(fontsize=40)
plt.xlabel('Review Topics', fontsize=50)
plt.xticks(fontsize=40)
(array([ 0., 200., 400., 600., 800., 1000., 1200.]), [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])
df_high_ratings = df_t.loc[(df_t['happiness_rating']=="Good")]
nmf_high_x = df_high_ratings['nmf_topics'].value_counts()
nmf_high_y = nmf_high_x.sort_index()
plt.figure(figsize=(50,30))
sns.barplot(nmf_high_x, nmf_high_y.index)
plt.title("NMF Topic Distribution for High Ratings", fontsize=50)
plt.ylabel('Frequency', fontsize=50)
plt.yticks(fontsize=40)
plt.xlabel('Review Topics', fontsize=50)
plt.xticks(fontsize=40)
(array([ 0., 2000., 4000., 6000., 8000., 10000., 12000., 14000.]), [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])
df_20k = df3.iloc[:20000]
df_20k
| freshdesk_id | happiness_rating | lemmatized | lemma_str | sentiment | |
|---|---|---|---|---|---|
| 3 | 2076 | NaN | [submit, final, project, evaluation, process, complete] | submit final project evaluation process complete | 0.050000 |
| 6 | 2079 | NaN | [opt, full, stack, web, development, course, wednesday, enrol, come, know, nothing, java, devlop... | opt full stack web development course wednesday enrol come know nothing java devloping side thin... | 0.412500 |
| 9 | 2082 | NaN | [unable, play, record, video, could, check, soon, possible, chandan] | unable play record video could check soon possible chandan | -0.250000 |
| 11 | 2084 | NaN | [assignment, get, verify, able, get, certificate, look, please] | assignment get verify able get certificate look please | 0.500000 |
| 13 | 2087 | Good | [face, difficulty, submit, question, execution, give, error, message, submit, fails, test, case,... | face difficulty submit question execution give error message submit fails test case let know wro... | -0.142857 |
| ... | ... | ... | ... | ... | ... |
| 20608 | 22816 | Good | [necessary, install, python, already, instal, anaconda] | necessary install python already instal anaconda | 0.000000 |
| 20609 | 22817 | NaN | [module, introduction, selenium, component, content, http, upload, library, fh, zip] | module introduction selenium component content http upload library fh zip | 0.000000 |
| 20610 | 22818 | NaN | [unable, share, download, able, see, certificate, lm, could, advice, step] | unable share download able see certificate lm could advice step | 0.000000 |
| 20611 | 22819 | NaN | [module, case, study, content, http, upload, library, fwgz, docx] | module case study content http upload library fwgz docx | 0.000000 |
| 20612 | 22820 | NaN | [module, case, study, content, http, upload, library, fezcpv, docx] | module case study content http upload library fezcpv docx | 0.000000 |
20000 rows × 5 columns
tf_vectorizer = CountVectorizer(max_df=0.9, min_df=25, max_features=5000)
tf = tf_vectorizer.fit_transform(df_20k['lemma_str'].values.astype('U'))
tf_feature_names = tf_vectorizer.get_feature_names()
doc_term_matrix = pd.DataFrame(tf.toarray(), columns=list(tf_feature_names))
doc_term_matrix
| able | accept | access | accord | accordingly | account | action | activate | actual | actually | ... | would | write | wrong | xlsx | xml | year | yes | yesterday | yet | zip | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 19995 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 19996 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 19997 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 19998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 19999 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
20000 rows × 872 columns
lda_model = LatentDirichletAllocation(n_components=10, learning_method='online', max_iter=500, random_state=0).fit(tf)
no_top_words = 10
def display_topics(model, feature_names, no_top_words):
for topic_idx, topic in enumerate(model.components_):
print("Topic %d:" % (topic_idx))
print(" ".join([feature_names[i]
for i in topic.argsort()[:-no_top_words - 1:-1]]))
display_topics(lda_model, tf_feature_names, no_top_words)
Topic 0: use file data code create command import output write name Topic 1: solution assignment please view find record link download rar see Topic 2: content http upload library docx module assignment zip txt ipynb Topic 3: class batch know start want let session would course like Topic 4: project complete time certification course need could video new due Topic 5: course one data also get kindly provide master program ask Topic 6: get test help error team question try work support code Topic 7: case study module assignment submit review unix still mysql show Topic 8: file google pdf drive usp drivesdk table attach database create Topic 9: java question answer program look ticket org doc mark extend
pyLDAvis.enable_notebook()
panel = pyLDAvis.sklearn.prepare(lda_model, tf, tf_vectorizer, mds='tsne')
panel
tfidf_vectorizer = TfidfVectorizer(max_df=0.90, min_df =25, max_features=5000, use_idf=True)
tfidf = tfidf_vectorizer.fit_transform(df_20k['lemma_str'])
tfidf_feature_names = tfidf_vectorizer.get_feature_names()
doc_term_matrix_tfidf = pd.DataFrame(tfidf.toarray(), columns=list(tfidf_feature_names))
doc_term_matrix_tfidf
| able | accept | access | accord | accordingly | account | action | activate | actual | actually | ... | would | write | wrong | xlsx | xml | year | yes | yesterday | yet | zip | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
| 1 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
| 2 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
| 3 | 0.323326 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
| 4 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.096959 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 19995 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
| 19996 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.278526 |
| 19997 | 0.289992 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
| 19998 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
| 19999 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
20000 rows × 872 columns
nmf = NMF(n_components=10, random_state=0, alpha=.1, init='nndsvd').fit(tfidf)
display_topics(nmf, tfidf_feature_names, no_top_words)
Topic 0: docx library upload http content module mod sql solution code Topic 1: case study module py solution http content library upload ii Topic 2: pdf do library upload http content py attach br submission Topic 3: assignment submit module unix review complete http content mysql still Topic 4: drive google file usp view drivesdk content http module solution Topic 5: txt library upload http content module unix solution br mod Topic 6: java library upload http content module casestudy do solution code Topic 7: zip upload library http content module file complete br attach Topic 8: batch course class get complete project please request certification help Topic 9: ipynb library upload http content please review py project notebook
nmf_topic_values = nmf.transform(tfidf)
df_20k['nmf_topics'] = nmf_topic_values.argmax(axis=1)
lda_topic_values = lda_model.transform(tf)
df_20k['lda_topics'] = lda_topic_values.argmax(axis=1)
lda_remap = {0: 'Coding Usage', 1: 'Assignment Solution',
2: 'Course Materials', 3: 'Class Batch Session', 4: 'Project Completion and Course Certification',
5: 'Course Content', 6: 'Test Help', 7: 'Assignment Submission', 8: 'Course Content: G-Drive issue', 9: 'Java Question Answer'}
df_20k['lda_topics'] = df_20k['lda_topics'].map(lda_remap)
nmf_remap = {0: 'Content Related', 1: 'Case Study Solution', 2: 'Project Submission', 3: 'Assignment Submission Related',
4: 'Course Content: G-Drive issue', 5: 'Module Solution', 6: 'Case Study Code Solution', 7: 'Course Content',
8: 'Certification Related', 9: 'Project Review Related'}
df_20k['nmf_topics'] = df_20k['nmf_topics'].map(nmf_remap)
nmf_x = df_20k['nmf_topics'].value_counts()
nmf_y = nmf_x.sort_index()
plt.figure(figsize=(50,30))
sns.barplot(nmf_x, nmf_y.index)
plt.title("NMF Topic Distribution", fontsize=50)
plt.ylabel('Review Topics', fontsize=50)
plt.yticks(fontsize=40)
plt.xlabel('Frequency', fontsize=50)
plt.xticks(fontsize=40)
(array([ 0., 1000., 2000., 3000., 4000., 5000., 6000., 7000.]), [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])
df_low_ratings = df_20k.loc[(df_20k['happiness_rating']=="Bad")]
nmf_low_x = df_low_ratings['nmf_topics'].value_counts()
nmf_low_y = nmf_low_x.sort_index()
plt.figure(figsize=(50,30))
sns.barplot(nmf_low_x, nmf_low_y.index)
plt.title("NMF Topic Distribution for Low Ratings", fontsize=50)
plt.ylabel('Frequency', fontsize=50)
plt.yticks(fontsize=40)
plt.xlabel('Review Topics', fontsize=50)
plt.xticks(fontsize=40)
(array([ 0., 20., 40., 60., 80., 100., 120.]), [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])
df_high_ratings = df_20k.loc[(df_20k['happiness_rating']=="Good")]
nmf_high_x = df_high_ratings['nmf_topics'].value_counts()
nmf_high_y = nmf_high_x.sort_index()
plt.figure(figsize=(50,30))
sns.barplot(nmf_high_x, nmf_high_y.index)
plt.title("NMF Topic Distribution for High Ratings", fontsize=50)
plt.ylabel('Frequency', fontsize=50)
plt.yticks(fontsize=40)
plt.xlabel('Review Topics', fontsize=50)
plt.xticks(fontsize=40)
(array([ 0., 100., 200., 300., 400., 500., 600., 700., 800., 900.]), [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])
df_40k = df3.iloc[20001:40000]
df_40k
| freshdesk_id | happiness_rating | lemmatized | lemma_str | sentiment | |
|---|---|---|---|---|---|
| 20614 | 22822 | NaN | [wait, selenium, module, content, http, upload, library, fklon, zip] | wait selenium module content http upload library fklon zip | 0.000000 |
| 20615 | 22823 | NaN | [ill, get, certification, course] | ill get certification course | -0.500000 |
| 20616 | 22824 | NaN | [module, content, http, upload, library, fxiqs, docx] | module content http upload library fxiqs docx | 0.000000 |
| 20617 | 22825 | NaN | [module, content, http, upload, library, fbyro, docx] | module content http upload library fbyro docx | 0.000000 |
| 20618 | 22826 | NaN | [module, content, http, upload, library, fqvhpq, docx] | module content http upload library fqvhpq docx | 0.000000 |
| ... | ... | ... | ... | ... | ... |
| 41362 | 43656 | NaN | [last, class, module, trainer, tell, error, cucumber, runner, file, cnnot, execute, text, kindly... | last class module trainer tell error cucumber runner file cnnot execute text kindly provide info... | 0.300000 |
| 41363 | 43657 | NaN | [insert, value, db, hibernate, get, error, call, org, hibernate, service, spi, serviceexception,... | insert value db hibernate get error call org hibernate service spi serviceexception unable creat... | 0.138889 |
| 41364 | 43658 | NaN | [due, date, assignment, show, active, since, yesterday, pls, make, live, also, submit] | due date assignment show active since yesterday pls make live also submit | -0.040657 |
| 41365 | 43659 | NaN | [kindly, help, resolve, error] | kindly help resolve error | 0.600000 |
| 41366 | 43660 | NaN | [try, submit, assignment, upload, file, one, error, come, error, occur, reload, page, try, also,... | try submit assignment upload file one error come error occur reload page try also try refresh pa... | 0.250000 |
19999 rows × 5 columns
tf_vectorizer = CountVectorizer(max_df=0.9, min_df=25, max_features=5000)
tf = tf_vectorizer.fit_transform(df_40k['lemma_str'].values.astype('U'))
tf_feature_names = tf_vectorizer.get_feature_names()
doc_term_matrix = pd.DataFrame(tf.toarray(), columns=list(tf_feature_names))
doc_term_matrix
| able | accept | access | accord | accordingly | account | accuracy | action | activate | active | ... | write | wrong | xlsx | xml | year | yes | yesterday | yet | youtube | zip | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 19994 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 19995 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 19996 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 19997 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 19998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
19999 rows × 995 columns
lda_model = LatentDirichletAllocation(n_components=10, learning_method='online', max_iter=500, random_state=0).fit(tf)
no_top_words = 10
def display_topics(model, feature_names, no_top_words):
for topic_idx, topic in enumerate(model.components_):
print("Topic %d:" % (topic_idx))
print(" ".join([feature_names[i]
for i in topic.argsort()[:-no_top_words - 1:-1]]))
display_topics(lda_model, tf_feature_names, no_top_words)
Topic 0: content http upload library assignment zip docx module txt please Topic 1: attach unable able machine import vm create table access get Topic 2: project submit review do submission solution final twb html verify Topic 3: file google drive share font view step color rgb data Topic 4: use java file error run create org user command apache Topic 5: work find know class video add please let record also Topic 6: ipynb complete show test certificate access lab answer email evaluate Topic 7: content http case upload library study module py twbx java Topic 8: course batch class certification start time would like training request Topic 9: assignment get help code question issue need give call error
pyLDAvis.enable_notebook()
panel = pyLDAvis.sklearn.prepare(lda_model, tf, tf_vectorizer, mds='tsne')
panel
tfidf_vectorizer = TfidfVectorizer(max_df=0.90, min_df =25, max_features=5000, use_idf=True)
tfidf = tfidf_vectorizer.fit_transform(df_40k['lemma_str'])
tfidf_feature_names = tfidf_vectorizer.get_feature_names()
doc_term_matrix_tfidf = pd.DataFrame(tfidf.toarray(), columns=list(tfidf_feature_names))
doc_term_matrix_tfidf
| able | accept | access | accord | accordingly | account | accuracy | action | activate | active | ... | write | wrong | xlsx | xml | year | yes | yesterday | yet | youtube | zip | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | ... | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.329128 |
| 1 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | ... | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 |
| 2 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | ... | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 |
| 3 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | ... | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 |
| 4 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | ... | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 19994 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | ... | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 |
| 19995 | 0.092072 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | ... | 0.0 | 0.0 | 0.0 | 0.294219 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 |
| 19996 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.399729 | ... | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.354195 | 0.0 | 0.0 | 0.000000 |
| 19997 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | ... | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 |
| 19998 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | ... | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 |
19999 rows × 995 columns
nmf = NMF(n_components=10, random_state=0, alpha=.1, init='nndsvd').fit(tfidf)
display_topics(nmf, tfidf_feature_names, no_top_words)
Topic 0: zip library upload http content project java file complete please Topic 1: case study py solution ii content upload library http twb Topic 2: docx library upload http content project please complete solution review Topic 3: ipynb library upload http content please review casestudy mod submit Topic 4: batch course project class certification complete get request help submit Topic 5: assignment submit pdf http content upload library complete html attach Topic 6: txt library upload http content do java py text pdf Topic 7: module rar content library http upload java casestudy twb answer Topic 8: file drive google usp view share drivesdk content http id Topic 9: twbx upload library http content pdf java review solution twb
nmf_topic_values = nmf.transform(tfidf)
df_40k['nmf_topics'] = nmf_topic_values.argmax(axis=1)
lda_topic_values = lda_model.transform(tf)
df_40k['lda_topics'] = lda_topic_values.argmax(axis=1)
lda_remap = {0: 'Assignment Content Upload', 1: 'Coding Usage',
2: 'Project Submission', 3: 'Course Content: G-Drive issue', 4: 'Coding Usage',
5: 'Class Recording', 6: 'Lab Access Help', 7: 'Course Content', 8: 'Course Certification Related', 9: 'Assignment Solution'}
df_40k['lda_topics'] = df_40k['lda_topics'].map(lda_remap)
nmf_remap = {0: 'Project Submission', 1: 'Case Study Solution', 2: 'Project Review', 3: 'Case Study',
4: 'Certification Project', 5: 'Assignment Submission', 6: 'Uploaded Content', 7: 'Case Study Solution',
8: 'Course Content: G-Drive issue', 9: 'Java Question Answer'}
df_40k['nmf_topics'] = df_40k['nmf_topics'].map(nmf_remap)
nmf_x = df_40k['nmf_topics'].value_counts()
nmf_y = nmf_x.sort_index()
plt.figure(figsize=(50,30))
sns.barplot(nmf_x, nmf_y.index)
plt.title("NMF Topic Distribution", fontsize=50)
plt.ylabel('Review Topics', fontsize=50)
plt.yticks(fontsize=40)
plt.xlabel('Frequency', fontsize=50)
plt.xticks(fontsize=40)
(array([ 0., 1000., 2000., 3000., 4000., 5000., 6000., 7000., 8000.,
9000.]),
[Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, ''),
Text(0, 0, '')])
df_low_ratings = df_40k.loc[(df_40k['happiness_rating']=="Bad")]
nmf_low_x = df_low_ratings['nmf_topics'].value_counts()
nmf_low_y = nmf_low_x.sort_index()
plt.figure(figsize=(50,30))
sns.barplot(nmf_low_x, nmf_low_y.index)
plt.title("NMF Topic Distribution for Low Ratings", fontsize=50)
plt.ylabel('Frequency', fontsize=50)
plt.yticks(fontsize=40)
plt.xlabel('Review Topics', fontsize=50)
plt.xticks(fontsize=40)
(array([ 0., 20., 40., 60., 80., 100., 120.]), [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])
df_high_ratings = df_40k.loc[(df_40k['happiness_rating']=="Good")]
nmf_high_x = df_high_ratings['nmf_topics'].value_counts()
nmf_high_y = nmf_high_x.sort_index()
plt.figure(figsize=(50,30))
sns.barplot(nmf_high_x, nmf_high_y.index)
plt.title("NMF Topic Distribution for High Ratings", fontsize=50)
plt.ylabel('Frequency', fontsize=50)
plt.yticks(fontsize=40)
plt.xlabel('Review Topics', fontsize=50)
plt.xticks(fontsize=40)
(array([ 0., 200., 400., 600., 800., 1000., 1200., 1400.]), [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])